### Racial Social Norms in Brazilian Schools - Main file

# This file runs all the analysis for "Racial Social Norms among Brazilian 
# Students:Academic Performance, Social Status and Racial Identification.
# I start by preparing my data, which includes deleting observation based on 
# number of missing observations within classroom.
# Then, it creates variables related based on students' friendship networks, 
# including the popularity index of Fryer and Torelli
# Next, I do the main analysis of the paper, relating students performance, 
# social status, and race. Finally, I run many robustness check.

####### Cleaning working space
rm(list = ls(all.names = TRUE)) 
gc() 

library(dplyr)
library(network)
library(sna)
library(ggplot2)
library(ggnetwork)
library(psych)
library(data.table)
library(cobalt)  
library(np) 
library(ggthemes)
library(xtable)
library(stargazer)
library(ggpubr)
library("lmtest")
library("sandwich")
library("plm")
library(stringr)
library(rlang)
library(car)
library(ic.infer)
library(multiwayvcov)
library("pscl")


####### Setting working environment --------

# Working folder should be the one where data and codes were stored
working_folder <- "C:/Users/Alysson/Dropbox/@Doc/@Tese/artigo_cabreuva/artigo_insper/version_pnas/codes/version_harvard_dataverse_sym"
setwd(working_folder)

# During the process, I will create a few new folders: intermediary_outputs,
# tables, and figures.
dir.create("intermediary_outputs")

# Select the sample that I will work with based on exclusion criteria for 
# class size and situation in school:
source("01_select_sample_en.R", encoding = "UTF-8")

# Once the data for the main analysis is chosen, I have to exclude the not 
# valid observations from networks and create
# a flag to indicate which observation are not valid.
source("02_prepare_networks_en.R", encoding = "UTF-8")

# Next, I create variables of interest: 
source("03_create_vars_en.R", encoding = "UTF-8")

# Next, I have to compute variables using the networks data and input it 
# on my main dataset. This one takes a long time...
source("04_compute_index_en.R", encoding = "UTF-8")

# Next, I compute grades score and potential supply of friendships:
source("05_grades_score_en.R", encoding = "UTF-8")

# Create tables and figures folder:
dir.create("tables")
dir.create("figures")

# Next, I move to descriptive analysis:
source("06_descriptive_en.R", encoding = "UTF-8")

# Examples of social status measure:
source("07_example_ssi_en.R", encoding = "UTF-8")

# Nonparametric analysis:
source("08_nonparametric_analysis_en.R", encoding = "UTF-8")

dir.create("tables/hetero")

# Run main regressions:
source("09_run_regs_en.R", encoding = "UTF-8")

# Robustness checks:
source("10_robustness_checks_en.R", encoding = "UTF-8")

# Don't forget to run the Stata do-files to get the outcomes of out
# logit analysis


